cd C:\Users\chausman\Dropbox\Legacy_utility_costs\Data_package_to_post
 
/////////////////////////
//INTRODUCTION


//consistent growth or loss
use data/legacy_utility_data_controls, clear
unique company_id if regime_length>=5 & regime_length!=. & resi_dummy_grow_1==1
unique company_id if regime_length>=5 & regime_length!=. & resi_dummy_grow_1==0

//change in number of customers
use data/legacy_utility_data_controls, clear
sum resi_total_custom if year==1997	
	local temp1=r(mean)*r(N)
sum resi_total_custom if year==2019	
	local temp2=r(mean)*r(N)	
disp `temp2'/`temp1'-1
sum comm_total_custom if year==1997	
	local temp3=r(mean)*r(N)
sum comm_total_custom if year==2019	
	local temp4=r(mean)*r(N)
disp (`temp2'+`temp4')/(`temp1'+`temp3')-1
	
//value of residential + commercial sales in 2019
egen total_revenue = rowtotal(resi_sales_revenue_nominal resi_trans_revenue_nominal ///
	comm_sales_revenue_nominal comm_trans_revenue_nominal)
sum total_revenue if year==2019 //68 billion
	disp r(mean)*r(N)/10^9
drop total_revenue
	
//customers in 2019
use data/legacy_utility_data_controls, clear
sum resi_total_custom if year==2019	
	disp r(mean)*r(N)/10^6 //70 million	(out of 123 million households in US)
sum comm_total_custom if year==2019	
	disp r(mean)*r(N)/10^6 //5 million
	
//alabama gas corp
use data/legacy_utility_data_controls, clear
sc resi_total_custom year if strmatch(company_name,"*ALABAMA GAS CORP*") & state_abbr=="AL"
sc resi_total_custom year if strmatch(company_name,"*ALABAMA GAS CORP*") & state_abbr=="AL" & year>=2000 & year<=2010
sum resi_total_custom if strmatch(company_name,"*ALABAMA GAS CORP*") & state_abbr=="AL" & year==2010
	local temp2010=r(mean)
sum resi_total_custom if strmatch(company_name,"*ALABAMA GAS CORP*") & state_abbr=="AL" & year==2000
	local temp2000=r(mean)	
	disp `temp2010'/`temp2000'-1
sc mmiles year if strmatch(company_name,"*ALABAMA GAS CORP*") & state_abbr=="AL"


	
/////////////////////////
//DATA

//large utility in California
list year resi_total_cust company_name state_abbr if resi_total_cust>5*10^6 & resi_total_cust!=.

//In the raw miles data, utilities vary in the level of precision they report, 
//even across years within the same utility.
use data/legacy_utility_data_controls, clear
sort company_id year 
bysort id: egen minmmiles=min(mmiles)
gen roundedmmiles=round(mmiles,1)
gen diff=mmiles-roundedmmiles
//br company_id year mmiles rounded diff if minmmiles<15

////////////
//unable to identify structure
use data/legacy_utility_data_controls, clear
gen muni_temp=max(muni_narrow, coop_narrow)
gen iou_temp=max(iou_narrow, private_narrow)
sum resi_total_custom if muni_temp+iou>1 & muni_temp+iou!=. //none; good
sum resi_total_custom if muni_temp+iou==0 & resi_total_custom!=0, detail
	local temp1=r(mean)
	local temp2=r(N)
	local temp1b=r(p50)
sum resi_total_custom if resi_total_custom!=0, detail
	local temp3=r(mean)
	local temp4=r(N)
	local temp3b=r(p50)
disp `temp1'/`temp3' //mean is 5% of overall mean
disp `temp1b'/`temp3b' //median is 159, which is 14%
disp `temp2'/`temp4' //2.6% of observations
disp `temp1'*`temp2'/(`temp3'*`temp4') //0.1% of customers

////////////////
//PHMSA/EIA matching
use data/legacy_utility_data_controls, clear
count if resi_cust1!=0 & resi_cust1!=.
	local temp1=r(N)
count if resi_cust1!=0 & resi_cust1!=. & mmiles!=.
	local temp2=r(N)
	disp `temp2'/`temp1' //83%
sum resi_cust1 if resi_cust1!=0 & resi_cust1!=.
	local temp1=r(mean)*r(N)
sum resi_cust1 if resi_cust1!=0 & resi_cust1!=. & mmiles!=.
	local temp2=r(mean)*r(N)
	disp `temp2'/`temp1' //87%

///////////////
//CUT-OFFS FOR SERVICE TERRITORY CHANGES
use data/legacy_utility_data_controls, clear
xtset id year
sum d.lnresi_cust1 d.lncomm_cust1 d.lnmmiles, detail



////////////////
//BUNDLED VS RETAIL CHOICE
use data/legacy_utility_data_controls, clear

bysort id: egen minmmiles=min(mmiles)
xtset id year 
gen dlnresi_cust1=d.lnresi_cust1
	compare dlnresi_cust1 dlnresi_grow1 if resi_dummy_grow_1==1
	compare dlnresi_cust1 dlnresi_shrink1 if resi_dummy_grow_1==0
gen dlncomm_cust1=d.lncomm_cust1
	compare dlncomm_cust1 dlncomm_grow1 if comm_dummy_grow_1==1
	compare dlncomm_cust1 dlncomm_shrink1 if comm_dummy_grow_1==0
gen resi_flat=0
	replace resi_flat=1 if resi_cust1==l.resi_cust1
	replace resi_dummy_grow_1=0 if resi_flat==1 & resi_dummy_grow_1!=. //not needed
gen comm_flat=0
	replace comm_flat=1 if comm_cust1==l.comm_cust1
	replace comm_dummy_grow_1=0 if comm_flat==1 & comm_dummy_grow_1!=.
reg dlnmmiles ///
	dlnresi_cust1 ///
	dlncomm_cust1 ///
	if portion_resi_sales>=.9 ///
	& abs(d.lnresi_cust1)<.2 & abs(d.lncomm_cust1)<.5 ///
	& regime_length>=2 ///	
	& minmmiles>=20 & abs(dlnmmiles)<=1 ///
	, cluster(id) 
unique id if e(sample)
local temp1=r(unique)
local temp2=r(N)	
reg dlnmmiles ///
	dlnresi_cust1 ///
	dlncomm_cust1 ///
	if /* portion_resi_sales>=.9*/ ///
	/*&*/ abs(d.lnresi_cust1)<.2 & abs(d.lncomm_cust1)<.5 ///
	& regime_length>=2 ///	
	& minmmiles>=20 & abs(dlnmmiles)<=1 ///
	, cluster(id) 
unique id if e(sample)
local temp3=r(unique)	
local temp4=r(N)		
disp 1-`temp1'/`temp3' //drop 2% of utilities
disp `temp3'-`temp1' //drop 15 utilities
disp 1-`temp2'/(`temp4') //drop 3% of observations
disp `temp4'-`temp2' //drop 326 observations 
	
	
	
/////////////////////////
//SUMMARY STATS AND FIGURES
use data/legacy_utility_data_controls, clear

gen nonzero_resi_trans_revenue=resi_trans_revenue_real if resi_trans_revenue_real>0
gen resi_total_revenue_real=resi_sales_revenue_real+resi_trans_revenue_real
gen nonzero_comm_trans_revenue=comm_trans_revenue_real if comm_trans_revenue_real>0
gen comm_total_revenue_real=comm_sales_revenue_real+comm_trans_revenue_real

//outliers we don't use in the main sample:
foreach v in resi comm{
foreach x in nr1 nrpc1 nrpq1{
	replace `v'_`x'=. if `v'_`x'<=0
}
}
replace resi_nrpc1_real=. if resi_nrpc1_real>350*10 //an order of magnitude larger than the median
replace resi_nrpq1_real=. if resi_nrpq1_real>6*10 //an order of magnitude larger than the median
replace comm_nrpc1_real=. if comm_nrpc1_real>1300*10
replace comm_nrpq1_real=. if comm_nrpq1_real>5*10

foreach v in resi_total_cust resi_sales_cust resi_trans_cust resi_nrpc1_real ///
	comm_total_cust comm_sales_cust comm_trans_cust comm_nrpc1_real ///
	mmiles {
	replace `v'=`v'/1000 //thousands
}
foreach v in resi_total_volume resi_sales_volume resi_trans_volume ///
	resi_total_revenue resi_sales_revenue_real resi_trans_revenue_real ///
	nonzero_resi_trans_revenue resi_nr1_real ///
	comm_total_volume comm_sales_volume comm_trans_volume ///
	comm_total_revenue comm_sales_revenue_real comm_trans_revenue_real ///
	nonzero_comm_trans_revenue comm_nr1_real  {
	replace `v'=`v'/10^6 //millions
}

label variable resi_total_cust "Residential: \\ \hspace{.2in} Customers, '000s"
label variable portion_resi_sales "\hspace{.2in} Bundled customers, proportion"
label variable resi_dummy_grow_1 "\hspace{.2in} Dummy, customer base growing"
label variable resi_total_volume "\hspace{.2in} Sales, Bcf"
label variable resi_total_revenue_real "\hspace{.2in} Revenue, '000,000s"
label variable resi_sales_price_real "\hspace{.2in} Average price, \\$/mcf"
label variable resi_nr1_real "\hspace{.2in} Net revenue, '000,000s"
label variable resi_nrpc1_real "\hspace{.2in} \hspace{.2in}Per customer, '000s"
label variable resi_nrpq1_real "\hspace{.2in}\hspace{.2in}  Per mcf"

label variable comm_total_cust "Commercial: \\ \hspace{.2in}Customers, '000s"
label variable portion_comm_sales "\hspace{.2in} Bundled customers, proportion"
label variable comm_dummy_grow_1 "\hspace{.2in} Dummy, customer base growing"
label variable comm_total_volume "\hspace{.2in} Sales, Bcf"
label variable comm_total_revenue_real "\hspace{.2in} Revenue, '000,000s"
label variable comm_sales_price_real "\hspace{.2in} Average price, \\$/mcf"
label variable comm_nr1_real "\hspace{.2in} Net revenue, '000,000s"
label variable comm_nrpc1_real "\hspace{.2in} \hspace{.2in} Per customer, '000s"
label variable comm_nrpq1_real "\hspace{.2in} \hspace{.2in} Per mcf"

label variable citygate_real "Citygate price, \\$/mcf"
label variable mmiles "Miles of pipeline, '000s"

label variable iou "=1 if investor-owned utility"

gen muni=1-iou //rather than muni_narrow
label variable muni "=1 if municipal utility"

//main
//keep service territory changes in here because they are fine in levels; just represent new ids in differences
//similarly keep regime_length==1 here
//similarly keep minmmiles<20, dlnmmiles>1
eststo clear
eststo: estpost summ ///
	resi_total_cust portion_resi_sales resi_dummy_grow_1 ///
	resi_total_volume resi_sales_price_real ///
	resi_total_revenue ///
	resi_nr1_real resi_nrpc1_real ///
	resi_nrpq1_real ///
	citygate_real ///
	mmiles ///
	iou muni ///
	if portion_resi_sales>=.9, detail
estout using summarystats.tex, ///
	cells("count(fmt(%9.0fc)) mean(fmt(2)) p50(fmt(2)) sd(fmt(2)) min(fmt(2)) max(fmt(2))") ///
	style(tex) ///
	label ///
	collabels(none) mlabels(none) ///
	replace
unique company_id if portion_resi_sales>=.9 //1565
sum year if portion_resi_sales>=.9
tab year if portion_resi_sales>=.9
count if portion_resi_sales>=.9
	disp r(N)/(2019-1997+1) //1278

//missing comm nr obs:
sum comm_total_cust if comm_nr1_real==., detail
/*br company_name year comm_total_volume comm_sales_price_real citygate_real ///
	if comm_nr1_real==. & comm_total_cust!=0	 //nr would be negative*/
sum comm_sales_price_real citygate_real ///
	if comm_nr1_real==. & comm_total_cust!=0	 //nr would be negative
count if comm_nr1_real==. & comm_total_cust==0
count if comm_nr1_real==. & comm_total_cust!=0
//around half are missing because no comm customers, around half because nr would be negative 


//appendix
//adding commercial
eststo clear
eststo: estpost summ ///
	resi_total_cust portion_resi_sales resi_dummy_grow_1 ///
	resi_total_volume resi_sales_price_real ///
	resi_total_revenue ///
	resi_nr1_real resi_nrpc1_real ///
	resi_nrpq1_real ///
	comm_total_cust portion_comm_sales comm_dummy_grow_1 ///
	comm_total_volume comm_sales_price_real ///
	comm_total_revenue ///
	comm_nr1_real comm_nrpc1_real ///
	comm_nrpq1_real ///
	citygate_real ///
	mmiles ///
	iou muni ///
	if portion_resi_sales>=.9, detail
estout using summarystatsappendix.tex, ///
	cells("count(fmt(%9.0fc)) mean(fmt(2)) p50(fmt(2)) sd(fmt(2)) min(fmt(2)) max(fmt(2))") ///
	style(tex) ///
	label ///
	collabels(none) mlabels(none) ///
	replace
unique company_id if portion_resi_sales>=.9 //1565
sum year if portion_resi_sales>=.9
tab year if portion_resi_sales>=.9
count if portion_resi_sales>=.9
	disp r(N)/(2019-1997+1) //1278 -- around 1300 per year



	
	
//////////////////////////
//MAIN TEXT: SUMMARY STATS ON GROWTH BY TYPE, FOR ESTIMATION SAMPLE
use data/legacy_utility_data_controls, clear
xtset id year	
reg dlnresi_nr1 dlnresi_grow1 dlnresi_shrink1 resi_dummy_grow_1 ///
	if portion_resi_sales>=.9 ///
	& abs(d.lnresi_cust1)<.2 & abs(d.lncomm_cust1)<.5 
	//mostly bundled; no service territory changes
	//ignoring outliers for this definition
gen sample=e(sample)
xtset id year

keep if sample==1
file open descriptive using descriptive.tex, write replace
reg resi_dummy_grow_1, cluster(id)
	file write descriptive "All utility-years & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
reg resi_dummy_grow_1 if iou==1, cluster(id)
	file write descriptive "By ownership type: \\ \hspace{.2in} Investor-owned utilities   & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
reg resi_dummy_grow_1 if iou==0, cluster(id)
	file write descriptive "\hspace{.2in} Municipally-owned utilities  & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
bysort id: gen temp=resi_cust1 if [_n]==1
	bysort id: egen temp2=max(temp)
	reg resi_dummy_grow_1 if temp2>=10^6 & temp2!=., cluster(id)
		file write descriptive "By number of residential customers in first year: \\ \hspace{.2in} 1 million or more  & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
	reg resi_dummy_grow_1 if temp2>=10^5 & temp2<10^6 & temp2!=., cluster(id)
		file write descriptive "\hspace{.2in} 100,000 -- 1 million & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
	reg resi_dummy_grow_1 if temp2>=10^4 & temp2<10^5 & temp2!=., cluster(id)
		file write descriptive "\hspace{.2in} 10,000 -- 100,000 & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
	reg resi_dummy_grow_1 if temp2>=10^3 & temp2<10^4 & temp2!=., cluster(id)
		file write descriptive "\hspace{.2in} 1,000 -- 10,000 & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
	reg resi_dummy_grow_1 if temp2>=10^2 & temp2<10^3 & temp2!=., cluster(id)
		file write descriptive "\hspace{.2in} 100 -- 1,000 & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
	reg resi_dummy_grow_1 if temp2<10^2 & temp2!=., cluster(id)
		file write descriptive "\hspace{.2in} 1 -- 100 & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
reg resi_dummy_grow_1 if year<2008, cluster(id)
	file write descriptive "By time period: \\ \hspace{.2in} 1997--2007 & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
reg resi_dummy_grow_1 if year>=2008, cluster(id)
	file write descriptive "\hspace{.2in} 2008-2019 & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==1
tab state_abbr if division==1
	reg resi_dummy_grow_1 if division==1, cluster(id)
		file write descriptive "By geographic region: \\ \hspace{.2in} New England (CT, MA, ME, NH, RI, VT) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==9
tab state_abbr if division==9
	reg resi_dummy_grow_1 if division==9, cluster(id)
		file write descriptive "\hspace{.2in} Pacific (CA, OR, WA) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==8
tab state_abbr if division==8
	reg resi_dummy_grow_1 if division==8, cluster(id)
		file write descriptive "\hspace{.2in} Mountain (AZ, CO, ID, MT, NM, NV, UT, WY) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==2
tab state_abbr if division==2
	reg resi_dummy_grow_1 if division==2, cluster(id)
		file write descriptive "\hspace{.2in} Middle Atlantic (NJ, NY, PA) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==3
tab state_abbr if division==3
	reg resi_dummy_grow_1 if division==3, cluster(id)
		file write descriptive "\hspace{.2in} East North Central (IL, IN, MI, OH, WI) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==4
tab state_abbr if division==4
	reg resi_dummy_grow_1 if division==4, cluster(id)
		file write descriptive "\hspace{.2in} West North Central (IA, KS, MN, MO, ND, NE, SD) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==6
tab state_abbr if division==6
	reg resi_dummy_grow_1 if division==6, cluster(id)
		file write descriptive "\hspace{.2in} East South Central (AL, KY, MS, TN) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==5
tab state_abbr if division==5
	reg resi_dummy_grow_1 if division==5, cluster(id)
		file write descriptive "\hspace{.2in} South Atlantic (DC, DE, FL, GA, MD, NC, SC, VA, WV) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==7
tab state_abbr if division==7
	reg resi_dummy_grow_1 if division==7, cluster(id)
		file write descriptive "\hspace{.2in} West South Central (AR, LA, OK, TX) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
file close descriptive



//////////////////////////
//APPENDIX: SUMMARY STATS ON GROWTH BY TYPE, COMMERCIAL
use data/legacy_utility_data_controls, clear
xtset id year	
reg dlncomm_nr1 dlncomm_grow1 dlncomm_shrink1 comm_dummy_grow_1 ///
	if portion_resi_sales>=.9 ///
	& abs(d.lnresi_cust1)<.2 & abs(d.lncomm_cust1)<.5 
	//mostly bundled; no service territory changes
	//ignoring outliers for this definition
gen sample=e(sample)
xtset id year

keep if sample==1
file open descriptive using descriptivecommercial.tex, write replace
reg comm_dummy_grow_1, cluster(id)
	file write descriptive "All utility-years & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
reg comm_dummy_grow_1 if iou==1, cluster(id)
	file write descriptive "By ownership type: \\ \hspace{.2in} IOUs   & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
reg comm_dummy_grow_1 if iou==0, cluster(id)
	file write descriptive "\hspace{.2in} Munis  & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
bysort id: gen temp=resi_cust1 if [_n]==1
	bysort id: egen temp2=max(temp)
	reg comm_dummy_grow_1 if temp2>=10^6 & temp2!=., cluster(id)
		file write descriptive "By size in first year: \\ \hspace{.2in} 1 million or more  & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
	reg comm_dummy_grow_1 if temp2>=10^5 & temp2<10^6 & temp2!=., cluster(id)
		file write descriptive "\hspace{.2in} 100,000 -- 1 million & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
	reg comm_dummy_grow_1 if temp2>=10^4 & temp2<10^5 & temp2!=., cluster(id)
		file write descriptive "\hspace{.2in} 10,000 -- 100,000 & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
	reg comm_dummy_grow_1 if temp2>=10^3 & temp2<10^4 & temp2!=., cluster(id)
		file write descriptive "\hspace{.2in} 1,000 -- 10,000 & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
	reg comm_dummy_grow_1 if temp2>=10^2 & temp2<10^3 & temp2!=., cluster(id)
		file write descriptive "\hspace{.2in} 100 -- 1,000 & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
	reg comm_dummy_grow_1 if temp2<10^2 & temp2!=., cluster(id)
		file write descriptive "\hspace{.2in} 1 -- 100 & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
reg comm_dummy_grow_1 if year<2008, cluster(id)
	file write descriptive "By time period: \\ \hspace{.2in} 1997--2007 & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
reg comm_dummy_grow_1 if year>=2008, cluster(id)
	file write descriptive "\hspace{.2in} 2008-2019 & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==1
tab state_abbr if division==1
	reg comm_dummy_grow_1 if division==1, cluster(id)
		file write descriptive "By geographic region: \\ \hspace{.2in} New England (CT, MA, ME, NH, RI, VT) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==9
tab state_abbr if division==9
	reg comm_dummy_grow_1 if division==9, cluster(id)
		file write descriptive "\hspace{.2in} Pacific (CA, OR, WA) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==8
tab state_abbr if division==8
	reg comm_dummy_grow_1 if division==8, cluster(id)
		file write descriptive "\hspace{.2in} Mountain (AZ, CO, ID, MT, NM, NV, UT, WY) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==2
tab state_abbr if division==2
	reg comm_dummy_grow_1 if division==2, cluster(id)
		file write descriptive "\hspace{.2in} Middle Atlantic (NJ, NY, PA) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==3
tab state_abbr if division==3
	reg comm_dummy_grow_1 if division==3, cluster(id)
		file write descriptive "\hspace{.2in} East North Central (IL, IN, MI, OH, WI) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==4
tab state_abbr if division==4
	reg comm_dummy_grow_1 if division==4, cluster(id)
		file write descriptive "\hspace{.2in} West North Central (IA, KS, MN, MO, ND, NE, SD) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==6
tab state_abbr if division==6
	reg comm_dummy_grow_1 if division==6, cluster(id)
		file write descriptive "\hspace{.2in} East South Central (AL, KY, MS, TN) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==5
tab state_abbr if division==5
	reg comm_dummy_grow_1 if division==5, cluster(id)
		file write descriptive "\hspace{.2in} South Atlantic (DC, DE, FL, GA, MD, NC, SC, VA, WV) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
tab state_name if division==7
tab state_abbr if division==7
	reg comm_dummy_grow_1 if division==7, cluster(id)
		file write descriptive "\hspace{.2in} West South Central (AR, LA, OK, TX) & " (string(e(N),"%9.0fc")) " & " (string(_b[_cons],"%9.2f")) " \\ " _n
file close descriptive




/////////////////////////
//SUMMARY FIGURES
use data/legacy_utility_data_controls, clear
xtset idnojumps year
bysort idnojumps: egen count=count(resi_cust1)
keep if count>=2 //otherwise graph has a bunch of one-year points
keep idnojumps company_name resi_cust1 year state_abbr portion_resi_sales
//scaling
gen temp1=resi_cust1 if year==1997
	bysort id: egen scale=max(temp1)
	drop temp1
	foreach x of numlist 1998(1)2019{
		gen temp1=resi_cust1 if year==`x' & scale==.
		bysort id: egen temp2=max(temp1)
		replace scale=temp2 if scale==.
		drop temp1 temp2
	}
replace resi_cust1=resi_cust1/scale
	format resi_cust1 %9.2f
set seed 305
	gen temp=runiform()
	bysort id: gen temp2=[_n]
	replace temp=. if temp2!=1
	bysort id: egen temp3=mode(temp)
	drop temp temp2
	keep if temp3<.042 //4% of obs
	drop temp3

tab company_name state if resi_cust1>1.7 & resi_cust1!=.
//BLACK HILLS ENERGY, CO
//YORK CTY NATURAL GAS AUTHORITY, SC
tab id if resi_cust1>1.7 & resi_cust1!=.

keep if portion_resi_sales>=.9
drop portion_resi_sales 

unique id //99
//decode id, gen(name)
drop scale company_name state
reshape wide resi_cust1 , i(year) j(id) 
tsset year
gen line=1
tw (tsline resi_cust1*, legend(off) graphregion(color(white)) ///
	yline(1, lcolor(black)) ) ///
	(tsline line, legend(off) graphregion(color(white)) ///
	ylabel(,nogrid) ///
	lcolor(black) lwidth(thick) yline(1, lcolor(black) lwidth(thick) ) )
graph export spaghetti1all.png, as(png) width(1200) replace
replace resi_cust1232=. if resi_cust1232>2
replace resi_cust12145=. if resi_cust12145>2
tw (tsline resi_cust1*, legend(off) graphregion(color(white)) ///
	yline(1, lcolor(black)) ) ///
	(tsline line, legend(off) graphregion(color(white)) ///
	ylabel(,nogrid angle(horizontal)) ///
	lcolor(black) lwidth(thick) yline(1, lcolor(black) lwidth(thick) ) ///
	ytitle("Residential customer count, normalized") )
graph export spaghetti1.png, as(png) width(3600) replace





use data/legacy_utility_data_controls, clear
//br if company_name=="BLACK HILLS ENERGY" & state_abbr=="CO"
//IOU
//around 35,000	customers at the beginning and 75,000 by the end
//new id in 2017
//https://energyoffice.colorado.gov/natural-gas
//e.g. outside of colorado springs, that was experiencing rapid growth in 2000s
//https://www.colorado.gov/pacific/sites/default/files/atoms/files/Colorado%20Utilities%20Report%202010.pdf
//this verifies the approximate magnitude of customer count
//largest city I could find: castle rock, with more than 100% growth in the 2000s
	//https://www.blackhillsenergy.com/sites/blackhillsenergy.com/files/cog-bhcg-tariffs.pdf
//br if company_name=="YORK CTY NATURAL GAS AUTHORITY" & state_abbr=="SC"
//not IOU or muni
//around 24,000	customers at the beginning and 55,000 by the end
//wikipedia says york county had 25% growth in the 2000s
	
//we observe XX utilities that experience five or more consecutive years of customer base growth, but also YY utilities that experience five or more consecutive years of customer base loss.
use data/legacy_utility_data_controls, clear

unique id if regime_length>=5 & resi_dummy_grow_1==1	//324
unique id if regime_length>=5 & resi_dummy_grow_1==0   //250
	
//compare to:	
unique idnojumps if regime_length>=5 & resi_dummy_grow_1==1	//327
unique idnojumps if regime_length>=5 & resi_dummy_grow_1==0	//254
	
unique id if regime_length>=5 & resi_dummy_grow_1==1 & comm_dummy_grow_1==1	//306
unique id if regime_length>=5 & resi_dummy_grow_1==0 & comm_dummy_grow_1==0 //239
	
unique id if resi_regime_length>=5 & resi_dummy_grow_1==1	//535
unique id if resi_regime_length>=5 & resi_dummy_grow_1==0 //570
	
xtset id year	
unique id if regime_length>=5 & resi_dummy_grow_1==1 ///
	& portion_resi_sales>=.9 ///
	& abs(d.lnresi_cust1)<.2 & abs(d.lncomm_cust1)<.5 //315
unique id if regime_length>=5 & resi_dummy_grow_1==0 ///
	& portion_resi_sales>=.9 ///
	& abs(d.lnresi_cust1)<.2 & abs(d.lncomm_cust1)<.5 //247



	
	
	


///////////////////////////
//MAPS
/*
ssc install spmap
ssc install maptile
maptile_install using "http://files.michaelstepner.com/geo_state.zip"
*/

//ious only
use data/legacy_utility_data_controls, clear
keep if iou==1
xtset id year
reg dlnresi_nr1 dlnresi_grow1 dlnresi_shrink1 resi_dummy_grow_1 ///
	if portion_resi_sales>=.9 & abs(d.lnresi_cust1)<.2 & abs(d.lncomm_cust1)<.5, cluster(id) 
keep if e(sample)
collapse (mean) resi_dummy_grow_1, by(state_abbr)
sum resi_dummy_grow_1  if state_abbr!="AK" & state_abbr!="HI" & state_abbr!="PR" //min 0.26
rename state_abbr state
sum resi_dummy_grow_1
replace resi=0.25 if resi>0.25 & resi<0.27 & resi!=. //so the legend starts at 0.25 instead of 0.26
maptile resi_dummy_grow_1, geo(state) mapif(state!="AK" & state!="HI") ///
	cutvalues(0.5 0.75) fcolor(YlOrRd) ///
	twopt(legend(size(vlarge) title("Proportion growing:", size(vlarge)) position(7) ) )
graph export mapious.png, as(png) width(1200) replace
hist resi_dummy if state!="AK" & state!="HI" & state!="PR", bin(20) xline(0.5 0.75)
gsort -resi_dummy_grow
//br

//munis and coops only
use data/legacy_utility_data_controls, clear
keep if iou!=1
xtset id year
reg dlnresi_nr1 dlnresi_grow1 dlnresi_shrink1 resi_dummy_grow_1 ///
	if portion_resi_sales>=.9 & abs(d.lnresi_cust1)<.2 & abs(d.lncomm_cust1)<.5, cluster(id) 
keep if e(sample)
collapse (mean) resi_dummy_grow_1, by(state_abbr)
sum resi_dummy_grow_1  if state_abbr!="AK" & state_abbr!="HI" & state_abbr!="PR" //min 0.28
rename state_abbr state
sum resi_dummy_grow_1
replace resi=0.25 if resi>0.25 & resi<0.29 & resi!=. //so the legend starts at 0.25 instead of 0.26
maptile resi_dummy_grow_1, geo(state) mapif(state!="AK" & state!="HI") ///
	cutvalues(0.5 0.75) fcolor(YlOrRd) ///
	twopt(legend(size(large) title("Proportion growing:", size(large)) position(7) ) ) ///
	ndfcolor(white)
graph export mapmunis.png, as(png) width(1200) replace
rename state state_abbr
merge 1:1 state_abbr using data/fips/state_fips
list state_name state_abbr  if resi_dummy_grow_1==.
hist resi_dummy if state_abbr!="AK" & state_abbr!="HI" & state_abbr!="PR", bin(20) xline(0.5 0.75)
gsort -resi_dummy_grow
//br

//white states in muni/coop map:
insheet using data/eia176/type_of_operations_and_sector_176.csv, names clear
//br if state=="DC"	// no munis or coops; good
sort company year
/*
br if state=="DE"	// no munis or coops; good	
br if state=="ID"	// no munis or coops; good	
br if state=="ME"	// no munis or coops; good	
br if state=="NH"	// no munis or coops; good	
br if state=="NJ"	// no munis or coops; good	
br if state=="NV"	// no munis or coops; good	
br if state=="OR"	// no munis or coops; good	
br if state=="RI"	// no munis or coops; good	
br if state=="VT"	// no munis or coops; good	
*/


	

	
/////////////////////////////////
//LOWESS FOR MAIN REGRESSION
use data/legacy_utility_data_controls, clear
bysort id: egen minmmiles=min(mmiles)
xtset idnojumps year
gen dlnresi_cust1=d.lnresi_cust1
format %9.1f dlnmmiles
format %9.1f dlnresi_cust1
keep if portion_resi_sales>=.9 ///
	& abs(d.lnresi_cust1)<.2 & abs(d.lncomm_cust1)<.5 ///
	& regime_length>=2 ///	
	& minmmiles>=20 & abs(dlnmmiles)<=1

tw 	(sc dlnmmiles dlnresi_cust1 if abs(dlnmmiles)<.5 [weight=resi_cust1], ///
		msize(vsmall) mcolor("166 206 227") ylabel(,angle(horizontal)) ) ///
	(hist dlnresi_cust1, yaxis(2) fcolor(none) lcolor("51 160 44") bin(30) ///
		yscale(range(0 100) axis(2)) ytitle("", axis(2)) ylabel(none, axis(2))) ///
	(lowess dlnmmiles dlnresi_cust1, lcolor("31 120 180") lwidth(thick) ///
		xline(0, lcolor(gs8)) yline(0, lcolor(gs8))  ///
		graphregion(color(white)) legend(off) xtitle("Change in log residential count") ///
		ytitle("Change in log miles") ylabel(, nogrid)) 
graph export mileslowess.png, as(png) width(3600) replace	


		
hist dlnmmiles if abs(dlnmmiles)<.5, ///
	fcolor(none) lcolor(gs8)  ///
	start(-0.5) width(0.02) ///
	graphregion(color(white)) ///
	xtitle("Change in log miles")
graph export mileshistogram.png, as(png) width(1200) replace		

count if dlnmmiles!=. & abs(dlnmmiles)<.5
local temp1=r(N)
count if dlnmmiles!=. & abs(dlnmmiles)<.5 & dlnmmiles<0
local temp2=r(N)
count if dlnmmiles!=. & abs(dlnmmiles)<.5 & dlnmmiles==0
local temp3=r(N)
count if dlnmmiles!=. & abs(dlnmmiles)<.5 & dlnmmiles>0
local temp4=r(N)
disp `temp2'/`temp1' //5%
disp `temp3'/`temp1' //35%
disp `temp4'/`temp1' //60%
disp `temp1'-`temp2'-`temp3'-`temp4'

sum lnmmiles if dlnmmiles!=. & abs(dlnmmiles)<.5
sum lnmmiles if dlnmmiles!=. & abs(dlnmmiles)<.5 & dlnmmiles<0
//similar means
//br company_id company_name year state_abbr lnmmiles if dlnmmiles!=. & abs(dlnmmiles)<.5 & dlnmmiles<0
//gsort -mmiles
sum dlnmmiles if dlnmmiles!=. & abs(dlnmmiles)<.5 & dlnmmiles<0, detail



use data/legacy_utility_data_controls, clear
xtset idnojumps year
gen dlnresi_cust1=d.lnresi_cust1
format %9.1f dlnresi_nr1
format %9.1f dlnresi_cust1
	local resi_nrpc1limit =350*10
	local resi_nrpq1limit =6*10
	local comm_nrpc1limit =1300*10
	local comm_nrpq1limit =5*10

keep if portion_resi_sales>=.9 ///
	& abs(d.lnresi_cust1)<.2 & abs(d.lncomm_cust1)<.5 ///
	& regime_length>=2 ///
	& abs(dlnresi_nr1)<2 ///
	& resi_nrpc1<`resi_nrpc1limit' & resi_nrpq1<`resi_nrpq1limit'
	
tw (sc dlnresi_nr1 dlnresi_cust1 [weight=resi_cust1], msize(vsmall) mcolor(gs8) ) ///
	(hist dlnresi_cust1, yaxis(2) fcolor(none) lcolor(gs8) bin(30) ///
		yscale(range(0 100) axis(2)) ytitle("", axis(2)) ylabel(none, axis(2))) ///
	(lowess dlnresi_nr1 dlnresi_cust1, lcolor(black) lwidth(medthick) ///
	xline(0, lcolor(black)) yline(0, lcolor(black))  ///
	graphregion(color(white)) legend(off) xtitle("Change in log residential count") ///
	ytitle("Change in log residential net revenue") ylabel(, nogrid))
graph export nrresilowess.png, as(png) width(1200) replace
	

use data/legacy_utility_data_controls, clear
xtset idnojumps year
gen dlncomm_cust1=d.lncomm_cust1
format %9.1f dlncomm_nr1
format %9.1f dlncomm_cust1
	local resi_nrpc1limit =350*10
	local resi_nrpq1limit =6*10
	local comm_nrpc1limit =1300*10
	local comm_nrpq1limit =5*10

keep if portion_resi_sales>=.9 ///
	& abs(d.lnresi_cust1)<.2 & abs(d.lncomm_cust1)<.5 ///
	& regime_length>=2 ///
	& abs(dlncomm_nr1)<2 ///
	& comm_nrpc1<`comm_nrpc1limit' & comm_nrpq1<`comm_nrpq1limit'
tw (sc dlncomm_nr1 dlncomm_cust1 [aweight=comm_cust1], msize(vsmall) mcolor(gs8) ) ///
	(hist dlncomm_cust1, yaxis(2) fcolor(none) lcolor(gs8) bin(40) ///
		yscale(range(0 100) axis(2)) ytitle("", axis(2)) ylabel(none, axis(2))) ///
	(lowess dlncomm_nr1 dlncomm_cust1, lcolor(black) lwidth(medthick) ///
	xline(0, lcolor(black)) yline(0, lcolor(black))  ///
	graphregion(color(white)) legend(off) xtitle("Change in log commercial count") ///
	ytitle("Change in log commercial net revenue") ylabel(, nogrid))
graph export nrcommlowess.png, as(png) width(1200) replace	


/////////////////////////
//NUMERICAL EXAMPLE LOSING 5% OF CUSTOMER BASE
disp 10000 * 300 / 10^6 //net revenue in millions, initially
disp 10000 * .05 //loses five percent of its customer base
disp 10000 * 300 / 10^6 * .95 //net revenue if prices don't change, millions
disp .5 * .05 //net revenue actually decreases by X percent, using table estimates
disp 10000 * 300 / 10^6 * (1- .5*.05) //net revenue using estimates, millions
disp 10000 * 300 * (1- .5*.05) / (10000 * .95) //per customer 
disp (10000 * 300 * (1- .5*.05) / (10000 * .95) - 300) / 300 //percent per customer




///////////////////////
//DEMOGRAPHIC CHARACTERISTICS OF EIGHT LARGEST LOSSES AND EIGHT LARGEST GAINS
forval v=1(1)4{
	insheet using data/census/cities/quickfacts`v'.csv, comma clear
	drop in 63/83
	save data/census/cities/quickfacts`v', replace
}
clear all
use data/census/cities/quickfacts1, clear
forval v=2(1)4{
	merge 1:1 fact using data/census/cities/quickfacts`v', nogen
}
drop valuenote* factnote
keep if fact=="Persons in poverty, percent" ///
	| fact=="Black or African American alone, percent" ///
	| fact=="Per capita income in past 12 months (in 2019 dollars), 2015-2019" ///
	| fact=="Population, Census, April 1, 2010" ///
	| fact=="Population estimates base, April 1, 2010,  (V2019)" ///
	| fact=="Population estimates, July 1, 2019,  (V2019)" ///
	| fact=="Population, percent change - April 1, 2010 (estimates base) to July 1, 2019,  (V2019)"
foreach v of varlist*{
	rename `v' v`v'
}
rename vfact fact
reshape long v, i(fact) j(value) string
replace fact="black" if strmatch(fact,"*Afr*")
replace fact="poverty" if strmatch(fact,"*poverty*")
replace fact="income" if strmatch(fact,"*income*")
replace fact="pop2010census" if fact=="Population, Census, April 1, 2010"
replace fact="pop2010estbase" if fact=="Population estimates base, April 1, 2010,  (V2019)"
replace fact="pop2019est" if fact=="Population estimates, July 1, 2019,  (V2019)"
replace fact="popchange20102019" if fact=="Population, percent change - April 1, 2010 (estimates base) to July 1, 2019,  (V2019)"
reshape wide v, i(value) j(fact) string
destring v*, replace ignore("$%,")
replace vblack=round(vblack,1)
replace vpoverty=round(vpoverty,1)
replace vincome=round(vincome/1000,1)
replace value=subinstr(value,"city"," ",.)
replace value=word(value,1)
replace value="saltlakecity" if value=="saltlake"
save data/census/cities/quickfacts, replace

insheet using data/census/cities/sub-est00int.csv, comma clear
keep if strmatch(name,"*city*")
rename stname state_name
replace state_name=upper(state_name)
merge m:1 state_name using data/fips/state_fips, keep(master match) nogen 
drop state_name state county place cousub fips
gen value=lower(subinstr(name," city","",1))
replace value=subinstr(value," ","",.)
keep if sumlev==162 //Incorporated place 
save data/census/cities/sub-est00int, replace


use data/legacy_utility_data_controls, clear
//not including ones that ever have a 20% year-on-year jump
keep if year==1997 | year==2019
keep year idnojumps company_name_eia state_abbr resi_total_custom
reshape wide resi_total_custom, i(idnojumps company state) j(year)
gen diff=resi_total_custom2019-resi_total_custom1997
gsort diff
list company_name state if [_n]<=8, sep(0)
keep if [_n]<=8
gen name=proper(company)
gen largest_city="Birmingham" if company_name=="ALABAMA GAS CORP"
	replace largest_city="Little Rock" if company_name=="CENTERPOINT ENERGY ARKLA" & state=="AR"
	replace largest_city="Mobile" if company_name=="SPIRE GULF INC" & state=="AL"
	replace largest_city="Lawton" if company_name=="CENTERPOINT ENERGY ARKLA" & state=="OK"
	replace largest_city="Philadelphia" if company_name=="PHILADELPHIA GAS WORKS" 
	replace largest_city="Charleston" if company_name=="HOPE GAS INC"
	replace largest_city="Shreveport" if company_name=="CENTERPOINT ENERGY ENTEX" & state=="LA"
	replace largest_city="Albany" if company_name=="ALBANY WTR GAS LT COMM" & state=="GA"
gen value=lower(subinstr(largest_city," ","",.))
merge 1:1 value using data/census/cities/quickfacts, keep(master match) nogen
merge 1:m value state_abbr using data/census/cities/sub-est00int, keep(master match) nogen
corr *2000*
sum *2000*
corr *2010*
sum *2010*
gsort diff
gen popchange=round(100*(vpop2019est-popestimate2000)/popestimate2000,1)
corr popchange vpopchange20102019
sum popchange vpopchange20102019
sum vpoverty vincome, detail
keep state name resi_total_custom1997 diff largest_city vpoverty popchange vblack vincome
order state name resi diff largest pop vblack vpoverty vincome
tostring resi diff, format(%11.0fc) replace force
tostring _all, replace
replace vincome=vincome+" \\"
export delimited using demographicsA.tex, delimiter("&") novarnames replace



use data/legacy_utility_data_controls, clear
//not including ones that ever have a 20% year-on-year jump
keep if year==1997 | year==2019
keep year idnojumps company_name_eia state_abbr resi_total_custom
reshape wide resi_total_custom, i(idnojumps company state) j(year)
gen diff=resi_total_custom2019-resi_total_custom1997
gsort -diff
list company_name state if [_n]<=8, sep(0)
keep if [_n]<=8
gen name=proper(company)
replace name="Southern California Gas" if name=="Southern California Gas Company" //save space
gen largest_city="Aurora" if name=="Nicor Gas"
	replace largest_city="Las Vegas" if name=="Southwest Gas Corporation" & state=="NV"
	replace largest_city="Salt Lake City" if name=="Questar Gas Company" & state=="UT"
	replace largest_city="Denver" if name=="Pub Service Co Of Colorado"
	replace largest_city="Phoenix" if name=="Southwest Gas Corporation" & state=="AZ"
	replace largest_city="Houston" if name=="Centerpoint Energy Entex"
	replace largest_city="San Jose" if name=="Pacific Gas"
	replace largest_city="Los Angeles" if name=="Southern California Gas"
gen value=lower(subinstr(largest_city," ","",.))
merge 1:1 value using data/census/cities/quickfacts, keep(master match) nogen
merge 1:m value state_abbr using data/census/cities/sub-est00int, keep(master match) nogen
corr *2000*
sum *2000*
corr *2010*
sum *2010*
gsort diff
gen popchange=round(100*(vpop2019est-popestimate2000)/popestimate2000,1)
corr popchange vpopchange20102019
sum popchange vpopchange20102019
sum vpoverty vincome, detail
keep state name resi_total_custom1997 diff largest_city vpoverty popchange vblack vincome
order state name resi diff largest pop vblack vpoverty vincome
tostring resi diff, format(%11.0fc) replace force
tostring _all, replace
replace vincome=vincome+" \\"
export delimited using demographicsB.tex, delimiter("&") novarnames replace



use data/legacy_utility_data_controls, clear
//not including ones that ever have a 20% year-on-year jump
keep if year==1997 | year==2019
keep year idnojumps company_name_eia state_abbr resi_total_custom 
reshape wide resi_total_custom, i(idnojumps company state) j(year)
gen diff=resi_total_custom2019-resi_total_custom1997
gsort -diff
keep if [_n]<=8
merge 1:m company_name_eia state_abbr idnojumps ///
	using data/legacy_utility_data_controls, keep(master match) nogen
keep idnojumps company_name resi_cust1 year state_abbr
//scaling
gen temp1=resi_cust1 if year==1997
	bysort id: egen scale=max(temp1)
	drop temp1
replace resi_cust1=resi_cust1/scale
	format resi_cust1 %9.2f
	drop scale
replace company_name=proper(subinstr(company_name," ","_",.))+"_"+state	
	drop state idnojumps
rename resi_cust1 r
replace company_name="Southern_California_Gas_CA" if company=="Southern_California_Gas_Company_CA"
levelsof company_name, local(levels)
reshape wide r , i(year) j(company) string
foreach v of local levels{
	label variable r`v' "`v'"
}
tsset year
gen line=1
tw (tsline r*, graphregion(color(white)) ///
	yline(1, lcolor(black)) ylabel(,nogrid) legend(col(1)) ///
	ytitle("Residential customer" "count, normalized")) 
graph export spaghettigrow.png, as(png) width(1200) replace
	
use data/legacy_utility_data_controls, clear
//not including ones that ever have a 20% year-on-year jump
keep if year==1997 | year==2019
keep year idnojumps company_name_eia state_abbr resi_total_custom 
reshape wide resi_total_custom, i(idnojumps company state) j(year)
gen diff=resi_total_custom2019-resi_total_custom1997
gsort diff
keep if [_n]<=8
merge 1:m company_name_eia state_abbr idnojumps ///
	using data/legacy_utility_data_controls, keep(master match) nogen
keep idnojumps company_name resi_cust1 year state_abbr
//scaling
gen temp1=resi_cust1 if year==1997
	bysort id: egen scale=max(temp1)
	drop temp1
replace resi_cust1=resi_cust1/scale
	format resi_cust1 %9.2f
	drop scale
replace company_name=proper(subinstr(company_name," ","_",.))+"_"+state	
	drop state idnojumps
rename resi_cust1 r
levelsof company_name, local(levels)
reshape wide r , i(year) j(company) string
foreach v of local levels{
	label variable r`v' "`v'"
}
tsset year
gen line=1
tw (tsline r*, graphregion(color(white)) ///
	yline(1, lcolor(black)) ylabel(,nogrid) legend(col(1)) ///
	ytitle("Residential customer" "count, normalized")) 
graph export spaghettishrink.png, as(png) width(1200) replace



/////////////////
//SIMULATING REVENUE REQUIREMENT WHEN GAINING OR LOSING CUSTOMERS
clear all
set obs 100

local year0customer=100000
gen percentage_move=[_n]-1
gen percentage_stay=100-percentage_move
gen number_move=`year0customer' * percentage_move / 100
gen number_stay=`year0customer' * percentage_stay / 100

//version 1: using empirical estimates from Table 4
local coststayers_table4=328
local costmovers_table4=328*(1-0.47)
	disp `coststayers_table4'
	disp `costmovers_table4'
//version 2: using expenditures categories from Table 6
disp (312*(1)+63*(0)+105*(0)+85*(.5)+66*(.1)+25*(.9))/(312+63+105+85+66+25) 
	//for taxes we use: .4 moves; .6 stays
local coststayers_table6=63+105+85+66+25+47
local costmovers_table6=63*(1-0)+105*(1-0)+85*(1-.5)+66*(1-.1)+25*(1-.9)+47*(1-.6)
	disp `coststayers_table6'
	disp `costmovers_table6'
//version 3: Table 6, no capital expenditures
local coststayers_nocap=85+66+25+47
local costmovers_nocap=85*(1-.5)+66*(1-.1)+25*(1-.9)+47*(1-.6)
	disp `coststayers_table6'
	disp `costmovers_table6'

foreach v in table4 table6 nocap{
	gen coststayers_total_`v'=number_stay * `coststayers_`v''
	gen costmovers_total_`v'=number_move * `costmovers_`v''
	gen revreq_`v'=coststayers_total_`v' + costmovers_total_`v'
	gen nrpc_`v'=revreq_`v'/number_stay
}

//numbers for intro, text, conclusion:
sum nrpc_table4 if percentage_move==15
	disp r(mean)-`coststayers_table4'
sum nrpc_table4 if percentage_move==20
	disp r(mean)-`coststayers_table4'
sum nrpc_table4 if percentage_move==40
	disp r(mean)-`coststayers_table4'	
//compare:
sum nrpc_table6 if percentage_move==15
	disp r(mean)-`coststayers_table6'
sum nrpc_nocap if percentage_move==15
	disp r(mean)-`coststayers_nocap'
sum nrpc_table6 if percentage_move==20
	disp r(mean)-`coststayers_table6'
sum nrpc_nocap if percentage_move==20
	disp r(mean)-`coststayers_nocap'
sum nrpc_nocap if percentage_move==40
	disp r(mean)-`coststayers_nocap'
sum nrpc_table6 if percentage_move==40
	disp r(mean)-`coststayers_table6'
sc nrpc* percentage_move if percentage_move<91	

	
sc nrpc_table4 nrpc_table6 nrpc_nocap percentage_move if percentage_move<80, ///
	yscale(range(0(300)1500)) ylabel(0(300)1500, angle(horizontal)) ///
	xscale(range(0(5)105)) xlabel(0(20)80) ///
	graphregion(color(white)) ///
	ytitle("Dollars (net revenue) per" "residential customer per year" " ") ///
	xtitle("Percentage of residential customers exiting the natural gas system") ///
	legend(off) ///
	connect(l l l) msymbol(none none none) ///
	lwidth(vthick thick thick) ///
	lcolor("31 120 180" "166 206 227" "166 206 227") ///
	xsize(8) ysize(5) ///
	text(1490 91 "Using assumptions" "on costs," "including capital", color("166 206 227")) ///
	text(980 91 "Using our empirical" "estimates of past" "price responses", color("31 120 180")) ///
	text(690 91 "Using assumptions" "on costs," "excluding capital", color("166 206 227")) 
graph export simulate.png, as(png) width(3600) replace




//scaling by 0.7
use data/legacy_utility_data_controls, clear
gen resi_bill=resi_sales_revenue_nominal/resi_sales_custom
egen any_bill_numerator=rowtotal(resi_sales_revenue_nominal comm_sales_revenue_nominal indu_sales_revenue_nominal elec_sales_revenue)
egen any_bill_denominator=rowtotal(resi_sales_custom comm_sales_custom indu_sales_custom elec_sales_custom)
gen any_bill=any_bill_numerator/any_bill_denominator
gen ratio=resi_bill/any_bill
sum ratio if year==2019, detail
sum ratio if year>=2009, detail
sum ratio, detail	
	
	
	
	
/////////////////////////////////
//WALKING THROUGH IDENTIFICATION USING DTE 
use data/legacy_utility_data_controls, clear

replace mmiles=mmiles/10^3
label variable mmiles "Miles, thousands"
replace resi_grow1=resi_grow1/10^6
label variable resi_grow1 "Residential customers, millions, when growing"
replace resi_shrink1=resi_shrink1/10^6
label variable resi_shrink1 "Residential customers, millions, when shrinking"
replace resi_cust1=resi_cust1/10^6
format %9.2f resi_grow1
format %9.2f resi_shrink1
format %9.2f resi_cust1

//time series, with grow - shrink - grow:
twoway (sc mmiles year if strmatch(company_name,"DTE*"), connect(l) msymbol(none) lcolor(black)) ///
	(sc resi_cust1 year if strmatch(company_name,"DTE*"), ///
		msymbol(none) connect(l) lcolor(gs8) yaxis(2)) ///
	(sc resi_grow1 year if strmatch(company_name,"DTE*") & resi_grow1!=0, ///
		msymbol(S) mcolor(gs8) lcolor(gs8) yaxis(2)) /// 
	(sc resi_shrink1 year if strmatch(company_name,"DTE*") & resi_shrink1!=0, ///
		msymbol(O) mcolor(black)  lcolor(gs8) yaxis(2) ///
	legend(col(1) order(1 3 4)) graphregion(color(white)) ///
	ytitle("Residential customers, millions", axis(2)) )
graph export fe1.png, as(png) width(1200) replace	
//symmetric linear fit:	
twoway (sc mmiles resi_grow1 if strmatch(company_name,"DTE*") & resi_grow1!=0, ///
		msymbol(S) mcolor(gs8) ) ///
	(sc mmiles resi_shrink1 if strmatch(company_name,"DTE*") & resi_shrink1!=0, ///
		msymbol(O) mcolor(black) ) ///
		(lfit mmiles resi_cust1 if strmatch(company_name,"DTE*"), ///
		lcolor(black) ///
		graphregion(color(white)) legend(off) ///
		ytitle("Miles, thousands") xtitle("Residential customers, millions") )	
graph export fe2.png, as(png) width(1200) replace	
//asymmetric linear fit:	
twoway (sc mmiles resi_grow1 if strmatch(company_name,"DTE*") & resi_grow1!=0, ///
		msymbol(S) mcolor(gs8) ) ///
	(sc mmiles resi_shrink1 if strmatch(company_name,"DTE*") & resi_shrink1!=0, ///
		msymbol(O) mcolor(black) ) ///
	(lfit mmiles resi_grow1 if strmatch(company_name,"DTE*") & resi_grow1!=0, ///
		lcolor(gs8) ) ///
	(lfit mmiles resi_shrink1 if strmatch(company_name,"DTE*") & resi_shrink1!=0 ///
		, lcolor(black) ///
		graphregion(color(white)) legend(off) ///
		ytitle("Miles, thousands") xtitle("Residential customers, millions") )	
graph export fe3.png, as(png) width(1200) replace	
//asymmetric per-period linear fit:	
twoway (sc mmiles resi_grow1 if strmatch(company_name,"DTE*") & resi_grow1!=0 & year<2007, ///
		msymbol(S) mcolor(gs8)) ///
	(sc mmiles resi_grow1 if strmatch(company_name,"DTE*") & resi_grow1!=0 & year>2007, ///
		msymbol(S) mcolor(gs8)) ///
	(sc mmiles resi_shrink1 if strmatch(company_name,"DTE*") & resi_shrink1!=0, ///
		msymbol(O) mcolor(black)) ///
	(lfit mmiles resi_grow1 if strmatch(company_name,"DTE*") & resi_grow1!=0 & year<2007, ///
		lcolor(gs8)) ///
	(lfit mmiles resi_grow1 if strmatch(company_name,"DTE*") & resi_grow1!=0 & year>2007, ///
		lcolor(gs8)) ///
	(lfit mmiles resi_shrink1 if strmatch(company_name,"DTE*") & resi_shrink1!=0 ///
		, lcolor(black) ///
		graphregion(color(white)) legend(off) ///
		ytitle("Miles, thousands") xtitle("Residential customers, millions") )	
graph export fe4.png, as(png) width(1200) replace	
//supposing we only observed dte some years - an example where regime effects matter a lot:
twoway (sc mmiles resi_grow1 if strmatch(company_name,"DTE*") & resi_grow1!=0 & year>2001 & year<2016, ///
		msymbol(S) mcolor(gs8) ) ///
	(sc mmiles resi_shrink1 if strmatch(company_name,"DTE*") & resi_shrink1!=0 & year>2001 & year<2016, ///
		msymbol(O) mcolor(black) ) ///
	(lfit mmiles resi_grow1 if strmatch(company_name,"DTE*") & resi_grow1!=0 & year>2001 & year<2016, ///
		lcolor(gs8) ) ///
	(lfit mmiles resi_shrink1 if strmatch(company_name,"DTE*") & resi_shrink1!=0 & year>2001 & year<2016 ///
		, lcolor(black) ///
		graphregion(color(white)) legend(off) ///
		ytitle("Miles, thousands") xtitle("Residential customers, millions") )	
graph export fe5.png, as(png) width(1200) replace	
twoway (sc mmiles resi_grow1 if strmatch(company_name,"DTE*") & resi_grow1!=0 & year<2007 & year>2001 & year<2016, ///
		msymbol(S) mcolor(gs8)) ///
	(sc mmiles resi_grow1 if strmatch(company_name,"DTE*") & resi_grow1!=0 & year>2007 & year>2001 & year<2016, ///
		msymbol(S) mcolor(gs8)) ///
	(sc mmiles resi_shrink1 if strmatch(company_name,"DTE*") & resi_shrink1!=0 & year>2001 & year<2016, ///
		msymbol(O) mcolor(black)) ///
	(lfit mmiles resi_grow1 if strmatch(company_name,"DTE*") & resi_grow1!=0 & year<2007 & year>2001 & year<2016, ///
		lcolor(gs8)) ///
	(lfit mmiles resi_grow1 if strmatch(company_name,"DTE*") & resi_grow1!=0 & year>2007 & year>2001 & year<2016, ///
		lcolor(gs8)) ///
	(lfit mmiles resi_shrink1 if strmatch(company_name,"DTE*") & resi_shrink1!=0 & year>2001 & year<2016 ///
		, lcolor(black) ///
		graphregion(color(white)) legend(off) ///
		ytitle("Miles, thousands") xtitle("Residential customers, millions") )	
graph export fe6.png, as(png) width(1200) replace	




/////////////////
//EXIT FEES, PDV
global costmovers=105*(1-0)+63*(1-0)+85*(1-.5)+66*(1-.1)+25*(1-.9)+47*(1-.6)
disp $costmovers
disp $costmovers+$costmovers/1.03+$costmovers/(1.03^2)+$costmovers/(1.03^3)+$costmovers/(1.03^4)


//////////////////////
//EXAMPLES WHERE ID CHANGES FOR APPENDIX
use data/legacy_utility_data_controls, clear
label variable resi_cust1 "Residential customers"

twoway (sc resi_cust1 year if company_id=="17616572MO" & idnojumps==2245, mcolor(black)) ///
	(sc resi_cust1 year if company_id=="17616572MO" & idnojumps==2246, mcolor(gs8) msymbol(Sh) ///		
	graphregion(color(white)) legend(off) title("Company 17616572MO") )	
graph export id_jumps1.png, as(png) width(1200) replace

twoway (sc resi_cust1 year if company_id=="17600043IL" & idnojumps==21, mcolor(black)) ///
	(sc resi_cust1 year if company_id=="17600043IL" & idnojumps==22, mcolor(gs8) msymbol(Sh)) ///
	(sc resi_cust1 year if company_id=="17600043IL" & idnojumps==23, mcolor(gs11) msymbol(S) ///		
	graphregion(color(white)) legend(off) title("Company 17600043IL") )	
graph export id_jumps2.png, as(png) width(1200) replace
